Preparation
Load statistical test function
#### ---------------- testchi2 ----------------
#' @title Compute the average salience of the topic and test significance of deviation
#' @name what
#' @description create a table and graphic of the topic
#' @param tabtest a table with variable trial, success and null.value
#' @param minsamp : Threshold of sample size requested for salience computation
#' @param mintest : Threshold of estimated value requested for chi-square test
testchi2<-function(tabtest=tabtest,
minsamp = 20,
mintest = 5)
{
tab<-tabtest
n<-dim(tab)[1]
# Compute salience if sample size sufficient (default : N>20)
tab$estimate <-NA
tab$salience <-NA
tab$chi2<-NA
tab$p.value<-NA
tab$estimate<-round(tab$success/tab$trial,5)
tab$salience<-tab$estimate/tab$null.value
# Chi-square test if estimated value sufficient (default : Nij* > 5)
for (i in 1:n) {
if(tab$trial[i]*tab$null.value[i]>=mintest) {
test<-prop.test(x=tab$success[i],n=tab$trial[i], p=tab$null.value[i],
alternative = "greater")
tab$chi2[i]<-round(test$statistic,2)
tab$p.value[i]<-round(test$p.value,5)
}
}
# }
return(tab)
}
What
Function
### ---------------- what ----------------
#' @title Compute the average salience of the topic
#' @name what
#' @description create a table and graphic of the topic
#' @param hc an hypercube prepared as data.table
#' @param subtop a subtag of the main tag (default = NA)
#' @param title Title of the graphic
what <- function (hc = hypercube,
what = "what",
subtop = NA,
title = "What ?")
{
tab<-hc
tab$what<-tab[[what]]
if (is.na(subtop)){tab$what <-tab$what !="_no_"}else {tab$what <- tab$what == subtop}
tab<-tab[,list(news = sum(news)),by = what]
tab$pct<-100*tab$news/sum(tab$news)
p <- plot_ly(tab,
labels = ~what,
values = ~pct,
type = 'pie') %>%
layout(title = title,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
output<-list("table" = tab, "plotly" =p)
return(output)
}
Application 1 : covid topic
res<-what(hc_year)
res$table
res$plotly
Application n°2 : state subtopic
res <-hc_year %>% filter(states != "_no_") %>%
what(what = "states",
subtop ="RUS",
title = "Share of Russia in international news")
res$table
res$plotly
Application n°3 : macroregion subtopic
res <-hc_year %>% filter(regions != "_no_") %>%
what(what = "regions",
subtop ="OR_EU",
title = "Share of EU in macroregional news")
res$table
res$plotly
Who.What
function
#### ---------------- who.what ----------------
#' @title visualize variation of the topic between media
#' @name who.what
#' @description create a table of variation of the topic by media
#' @param hc an hypercube prepared as data.table
#' @param test : visualize test (TRUE) or salience (FALSE)
#' @param minsamp : Threshold of sample size requested for salience computation
#' @param mintest sample size of estimate for chi-square test (default = 5)
#' @param title Title of the graphic
who.what <- function (hc = hypercube,
what = "what",
subtop = NA,
test = FALSE,
minsamp = 20,
mintest = 5,
title = "Who says What ?")
{
tab<-hc
tab$what<-tab[[what]]
if (is.na(subtop)){tab$what <-tab$what !="_no_"}else {tab$what <- tab$what == subtop}
# {tab$what <-tab$what !="_no_"}
tab<-tab[,list(trial = sum(news),success=round(sum(news*what),0)),by = list(who)]
ref <-round(sum(tab$success)/sum(tab$trial),4)
tab$null.value<-ref
tab<-testchi2(tabtest=tab,
minsamp = minsamp,
mintest = mintest)
if (test==FALSE) {tab$index =tab$salience
tab$index[tab$index>4]<-4
tab<-tab[tab$trial > minsamp,]
mycol<-brewer.pal(7,"YlOrRd")
}
else {tab$index=1-tab$p.value
tab<-tab[tab$trial*tab$null.value>mintest,]
mycol<-rev(brewer.pal(7,"RdYlBu"))
mycol[4]<-"lightyellow"
}
p <- plot_ly(tab,
x = ~who,
y = ~estimate*100,
color= ~index,
colors= mycol,
hovertemplate = ~paste('Source: ',who,
'<br /> Total news : ', round(trial,0),
'<br /> Topic news : ', round(success,0),
'<br /> % observed : ', round(estimate*100,2),'%',
'<br /> % estimated : ', round(null.value*100,2),'%',
'<br /> Salience : ', round(salience,2),
'<br /> p.value : ', round(p.value,4)),
type = "bar") %>%
layout(title = title,
yaxis = list(title = "% news"),
barmode = 'stack')
output<-list("table" = tab, "plotly" =p)
return(output)
}
Applicaton n°1 : Covid Topic
An example of computation of the share of a non spatial topic (Covid)
in the full sample of news.
res <- hc_year %>%
who.what(what = "what",
title = "Share of Covid in total news",
test=FALSE)
res$table
res$plotly
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays
Application n°2 : State subtopic
An example of computation of the share of a national subtopic
(Russia) in the sample of news where the topic is present (news with at
least one state mentionned).
res <-hc_year %>% filter(states != "_no_") %>%
who.what(what = "states",
subtop ="RUS",
title = "Share of Russia in international news",
test=TRUE)
res$table
res$plotly
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays
Application n°3 : Macroregion subtopic
Same example applied to macroregion : what is the share of the
subtopic European Union in the subsample news where at least one
macroregion is mentionned.
res <-hc_year %>% filter(regions != "_no_") %>%
who.what(what = "regions",
subtop ="OR_EU",
title = "Share of EU in macroregional news",
test=TRUE)
res$table
res$plotly
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays
saveWidget(widget=res$plotly,
file = "widgets/widget_who_what.html",
selfcontained = T,
libdir="lib")
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays
When.What
function
#### ---------------- when.what ----------------
#' @title visualize variation of the topic through time
#' @name when.what
#' @description create a table of variation of the topic by media
#' @param test : visualize test (TRUE) or salience (FALSE)
#' @param minsamp : Threshold of sample size requested for salience computation
#' @param mintest sample size of estimate for chi-square test (default = 5)
#' @param title Title of the graphic
when.what <- function (hc = hypercube,
what = "what",
subtop = NA,
test = FALSE,
minsamp = 20,
mintest = 5,
title = "When is said What ?")
{
tab<-hc
tab$what<-tab[[what]]
if (is.na(subtop)){tab$what <-tab$what !="_no_"}else {tab$what <- tab$what == subtop}
# {tab$what <-tab$what !="_no_"}
tab<-tab[,list(trial = sum(news),success=round(sum(news*what),0)),by = list(when)]
ref <-round(sum(tab$success)/sum(tab$trial),4)
tab$null.value<-ref
tab<-testchi2(tabtest=tab,
minsamp = minsamp,
mintest = mintest)
if (test==FALSE) {tab$index =tab$salience
tab<-tab[tab$trial > minsamp,]
mycol<-brewer.pal(7,"YlOrRd")
}
else {tab$index=tab$p.value
tab<-tab[tab$trial*tab$null.value>mintest,]
mycol<-brewer.pal(7,"RdYlBu")
mycol[4]<-"lightyellow"
}
p <- plot_ly(tab,
x = ~as.character(when),
y = ~estimate*100,
color= ~index,
colors= mycol,
# hoverinfo = "text",
hovertemplate = ~paste('Time: ',when,
'<br /> Total news : ', round(trial,0),
'<br /> Topic news : ', round(success,0),
'<br /> % observed : ', round(estimate*100,2),'%',
'<br /> % estimated : ', round(null.value*100,2),'%',
'<br /> Salience : ', round(salience,2),
'<br /> p.value : ', round(p.value,4)),
type = "bar") %>%
layout(title = title,
yaxis = list(title = "% news"),
barmode = 'stack')
output<-list("table" = tab, "plotly" =p)
return(output)
}
Applicaton n°1 : Covid Topic
An example of computation of the share of a non spatial topic (Covid)
in the full sample of news by week for one media.
res <- hc_week %>% filter(who=="DEU_suddeu") %>%
when.what(what = "what",
title = "Share of Covid-19 topic in news published by Süddeutsche Zeitung",
test=FALSE)
res$table
res$plotly
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays
Application n°2 : State subtopic
Example of analysis of the share of news about Russia among news
mentionning one country, by month, for Le Figaro.
res <-hc_month %>% filter(states != "_no_") %>% filter(who=="FRA_figaro") %>%
when.what(what = "states",
subtop ="RUS",
title = "Share of Russia in international news published by Le Figaro",
test=TRUE)
res$table
res$plotly
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays
Application n°3 : Macroregion subtopic
Example of analysis of the share of news about European news among
news mentioning one macroregion, by year, for Dunya.
res <-hc_year %>% filter(regions != "_no_") %>% filter(who=="TUR_dunya") %>%
when.what(what = "regions",
subtop ="OR_EU",
title = "Share of EU in macroregional news published by Dunya",
test=TRUE)
res$table
res$plotly
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays
saveWidget(widget=res$plotly,
file = "widgets/widget_who_what.html",
selfcontained = T,
libdir="lib")
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays
---
title: "Geographical analysis of media"
subtitle: "5. Widgets"
author: "Claude Grasland"
output: html_notebook
---


```{r setup5, echo = FALSE, comment = FALSE, warning = FALSE, message = FALSE}
knitr::opts_chunk$set(echo = TRUE, comment = FALSE, warning = FALSE)
library(sf)
library(plotly)
library(RColorBrewer)
library(data.table)
library(dplyr)
library(knitr)
library(htmlwidgets)
#library(tidygraph)
#library(ggraph)
#library(visNetwork)
```



## Objective

The aim of this section is to present the different widgets used for the exploration of hypercubes and developped during the ODYCCEUS project. We adapt a little the initial programs for the case of octocubes that are used in IMAGEUN but the principles remains the same. Each widget will export a dataframe and a plotly figure, making possible to store the results in javascript and/or to use the table for development with another software.



## Preparation

### Load multilevel octocubes and transform in hypercubes

We load the octocubes at different levels of time agregation and transform them in hypercubes by removing the dual dimensions of states and regions

```{r}
base<-readRDS("octocubes/hc_mycorpus_covid_states_regions.RDS")
hc_day<-base$day[,.(tag=sum(tags),news=sum(news)),.(who,what,when,states=states1,regions=regions1)]
hc_week<-base$week[,.(tag=sum(tags),news=sum(news)),.(who,what,when,states=states1,regions=regions1)]
hc_month<-base$month[,.(tag=sum(tags),news=sum(news)),.(who,what,when,states=states1,regions=regions1)]
hc_year<-base$year[,.(tag=sum(tags),news=sum(news)),.(who,what,when,states=states1,regions=regions1)]
```


### Load statistical test function

```{r}
#### ---------------- testchi2 ----------------
#' @title  Compute the average salience of the topic and test significance of deviation
#' @name what
#' @description create a table and graphic of the topic
#' @param tabtest a table with variable trial, success and null.value
#' @param minsamp : Threshold of sample size requested for salience computation
#' @param mintest : Threshold of estimated value requested for chi-square test


testchi2<-function(tabtest=tabtest,
                   minsamp = 20,
                   mintest = 5) 
{
  tab<-tabtest
  n<-dim(tab)[1]
  
  # Compute salience if sample size sufficient (default : N>20)
  tab$estimate <-NA
  tab$salience <-NA
  tab$chi2<-NA
  tab$p.value<-NA
   tab$estimate<-round(tab$success/tab$trial,5)
   tab$salience<-tab$estimate/tab$null.value
  
  # Chi-square test if estimated value sufficient (default : Nij* > 5)
  
  for (i in 1:n) {
    if(tab$trial[i]*tab$null.value[i]>=mintest) {  
      test<-prop.test(x=tab$success[i],n=tab$trial[i], p=tab$null.value[i], 
                      alternative = "greater")
      tab$chi2[i]<-round(test$statistic,2)
      tab$p.value[i]<-round(test$p.value,5)
    } 
  }
 # }
  return(tab)
}

```

## What


### Function

```{r}
### ---------------- what ----------------
#' @title  Compute the average salience of the topic
#' @name what
#' @description create a table and graphic of the topic
#' @param hc an hypercube prepared as data.table
#' @param subtop a subtag of the main tag (default = NA)
#' @param title Title of the graphic


what <- function (hc = hypercube,
                  what = "what",
                  subtop = NA,
                  title = "What ?")
{
 
  
tab<-hc
tab$what<-tab[[what]]
if (is.na(subtop)){tab$what <-tab$what !="_no_"}else {tab$what <- tab$what == subtop}

tab<-tab[,list(news = sum(news)),by = what]
tab$pct<-100*tab$news/sum(tab$news)

p <- plot_ly(tab,
             labels = ~what,
             values = ~pct,
             type = 'pie') %>%
  layout(title = title,
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

output<-list("table" = tab, "plotly" =p)

return(output)

}
```

### Application 1 : covid topic


```{r}
res<-what(hc_year)
res$table
res$plotly
```


### Application n°2 : state subtopic

```{r}
res <-hc_year %>% filter(states != "_no_") %>%
what(what = "states",
     subtop ="RUS",
     title = "Share of Russia in international news")
res$table
res$plotly
```

### Application n°3 : macroregion subtopic

```{r}
res <-hc_year %>% filter(regions != "_no_") %>%
what(what = "regions",
     subtop ="OR_EU",
     title = "Share of EU in macroregional news")
res$table
res$plotly
```

## Who.What

### function

```{r}
#### ---------------- who.what ----------------
#' @title  visualize variation of the topic between media
#' @name who.what
#' @description create a table of variation of the topic by media
#' @param hc an hypercube prepared as data.table
#' @param test : visualize test (TRUE) or salience (FALSE)
#' @param minsamp : Threshold of sample size requested for salience computation
#' @param mintest sample size of estimate for chi-square test (default = 5)
#' @param title Title of the graphic


who.what <- function (hc = hypercube,
                      what = "what",
                      subtop = NA,
                      test = FALSE,
                      minsamp = 20,
                      mintest = 5,
                      title = "Who says What ?")
{
  
  tab<-hc
  tab$what<-tab[[what]]
if (is.na(subtop)){tab$what <-tab$what !="_no_"}else {tab$what <- tab$what == subtop}
#  {tab$what <-tab$what !="_no_"}
  
  tab<-tab[,list(trial = sum(news),success=round(sum(news*what),0)),by = list(who)]
  ref <-round(sum(tab$success)/sum(tab$trial),4)
  tab$null.value<-ref
  
  tab<-testchi2(tabtest=tab,
                minsamp = minsamp,
                mintest = mintest)
  
  
  
  if (test==FALSE) {tab$index =tab$salience
  tab$index[tab$index>4]<-4
  tab<-tab[tab$trial > minsamp,]
  mycol<-brewer.pal(7,"YlOrRd")
  } 
  else {tab$index=1-tab$p.value
  tab<-tab[tab$trial*tab$null.value>mintest,]
  mycol<-rev(brewer.pal(7,"RdYlBu"))
  mycol[4]<-"lightyellow"
  }
  
  p <- plot_ly(tab,
               x = ~who,
               y = ~estimate*100,
               color= ~index,
               colors= mycol,
               hovertemplate = ~paste('Source: ',who,
                             '<br /> Total news  : ', round(trial,0),
                             '<br /> Topic news : ', round(success,0),
                             '<br /> % observed  : ', round(estimate*100,2),'%',
                             '<br /> % estimated : ', round(null.value*100,2),'%',
                             '<br /> Salience : ', round(salience,2),  
                             '<br /> p.value : ', round(p.value,4)),
               type = "bar")  %>%
    layout(title = title,
           yaxis = list(title = "% news"),
           barmode = 'stack')
  
  output<-list("table" = tab, "plotly" =p)
  
  return(output)
  
}
```

### Applicaton n°1 : Covid Topic

An example of computation of the share of a non spatial topic (Covid) in the full sample of news.

```{r}
res <- hc_year %>%
who.what(what = "what",
     title = "Share of Covid in total news",
     test=FALSE)

res$table
res$plotly
```



### Application n°2 : State subtopic

An example of computation of the share of a national subtopic (Russia) in the sample of news where the topic is present (news with at least one state mentionned).

```{r}
res <-hc_year %>% filter(states != "_no_") %>%
who.what(what = "states",
     subtop ="RUS",
     title = "Share of Russia in international news",
     test=TRUE)
res$table
res$plotly
```

### Application n°3 : Macroregion subtopic

Same example applied to macroregion : what is the share of the subtopic European Union in the subsample news where at least one macroregion is mentionned.

```{r}
res <-hc_year %>% filter(regions != "_no_") %>%
who.what(what = "regions",
     subtop ="OR_EU",
     title = "Share of EU in macroregional news",
     test=TRUE)
res$table
res$plotly
saveWidget(widget=res$plotly,
           file = "widgets/widget_who_what.html",
           selfcontained = T,
           libdir="lib")
```



## When.What

### function
```{r}

#### ---------------- when.what ----------------
#' @title  visualize variation of the topic through time
#' @name when.what
#' @description create a table of variation of the topic by media
#' @param test : visualize test (TRUE) or salience (FALSE)
#' @param minsamp : Threshold of sample size requested for salience computation
#' @param mintest sample size of estimate for chi-square test (default = 5)
#' @param title Title of the graphic


when.what <- function (hc = hypercube,
                       what = "what",
                       subtop = NA,
                       test = FALSE,
                       minsamp = 20,
                       mintest = 5,
                       title = "When is said What ?")
{
  
  tab<-hc
  tab$what<-tab[[what]]
if (is.na(subtop)){tab$what <-tab$what !="_no_"}else {tab$what <- tab$what == subtop}
#  {tab$what <-tab$what !="_no_"}
  
  tab<-tab[,list(trial = sum(news),success=round(sum(news*what),0)),by = list(when)]
  ref <-round(sum(tab$success)/sum(tab$trial),4)
  tab$null.value<-ref
  
  tab<-testchi2(tabtest=tab,
                minsamp = minsamp,
                mintest = mintest)
  
  if (test==FALSE) {tab$index =tab$salience
  tab<-tab[tab$trial > minsamp,]
  mycol<-brewer.pal(7,"YlOrRd")
  } 
  else {tab$index=tab$p.value
  tab<-tab[tab$trial*tab$null.value>mintest,]
  mycol<-brewer.pal(7,"RdYlBu")
  mycol[4]<-"lightyellow"
  }
  
  
  p <- plot_ly(tab,
               x = ~as.character(when),
               y = ~estimate*100,
               color= ~index,
               colors= mycol,
     #          hoverinfo = "text",
               hovertemplate = ~paste('Time: ',when,
                             '<br /> Total news  : ', round(trial,0),
                             '<br /> Topic news : ', round(success,0),
                             '<br /> % observed  : ', round(estimate*100,2),'%',
                             '<br /> % estimated : ', round(null.value*100,2),'%',
                             '<br /> Salience : ', round(salience,2),  
                             '<br /> p.value : ', round(p.value,4)),
               type = "bar")  %>%
    layout(title = title,
           yaxis = list(title = "% news"),
           barmode = 'stack')
  
  output<-list("table" = tab, "plotly" =p)
  
  return(output)
  
}
```



### Applicaton n°1 : Covid Topic

An example of computation of the share of a non spatial topic (Covid) in the full sample of news by week for one media.

```{r}
res <- hc_week %>% filter(who=="DEU_suddeu") %>%
when.what(what = "what",
     title = "Share of Covid-19 topic in news published by Süddeutsche Zeitung",
     test=FALSE)

res$table
res$plotly
```



### Application n°2 : State subtopic

Example of analysis of the share of news about Russia among news mentionning one country, by month, for Le Figaro.

```{r}
res <-hc_month %>% filter(states != "_no_") %>%  filter(who=="FRA_figaro") %>%
when.what(what = "states",
     subtop ="RUS",
     title = "Share of Russia in international news published by Le Figaro",
     test=TRUE)
res$table
res$plotly
```

### Application n°3 : Macroregion subtopic

Example of analysis of the share of news about European news among news mentioning one macroregion, by year, for Dunya.

```{r}
res <-hc_year %>% filter(regions != "_no_") %>%  filter(who=="TUR_dunya") %>%
when.what(what = "regions",
     subtop ="OR_EU",
     title = "Share of EU in macroregional news published by Dunya",
     test=TRUE)
res$table
res$plotly
saveWidget(widget=res$plotly,
           file = "widgets/widget_who_what.html",
           selfcontained = T,
           libdir="lib")
```